#load some required libraries
library(mfp)
library(bfp)
#define your working directory, where the data files are stored
workdir=""
data.example = read.csv("https://raw.githubusercontent.com/aliaksah/EMJMCMC2016/master/supplementaries/BGNLM/abalone%20age/abalone.data",header = F)
data.example$MS=as.integer(data.example$V1=="M")
data.example$FS=as.integer(data.example$V1=="F")
data.example$V1=data.example$V9
data.example$V9 = NULL
names(data.example) = c("Age","Length", "Diameter","Height","WholeWeight","ShuckedWeight","VisceraWeight","ShellWeight","Male","Femele")
set.seed(040590)
teid = read.csv("https://raw.githubusercontent.com/aliaksah/EMJMCMC2016/master/supplementaries/BGNLM/abalone%20age/teid.csv",sep = ";")[,2]
test = data.example[teid,]
data.example = data.example[-teid,]
train = data.example
vect = NULL
vect$x = as.matrix(data.example[,-1])
vect$y = data.example$Age
sum(test$Age)
gc()
#prepare the data structures for the final results
results=array(0,dim = c(2,100,5))
features = names(train)[-1]
for(ii in 1:100)
{
print(paste("iteration ",ii))
set.seed(ii)
t=system.time({
formula1 = paste(paste(colnames(data.example)[1], ' ~ 1 + fp(',paste0(colnames(data.example)[-c(1)], collapse = ') + fp(')))
formula1 = as.formula(paste(formula1, ')', sep = ''))
mod.mfp = mfp(formula1, data = data.example, family = gaussian, select = 0.05)
#formula2 = paste(paste(colnames(data.example)[1], ' ~ 1 + bfp(',paste0(colnames(data.example)[-c(1)], collapse = ') + bfp(')))
#formula2 = as.formula(paste(formula2, ')', sep = ''))
#mod.mfp = BayesMfp(formula2, data = data.example, family = gaussian, priorSpecs = list(a = 4, modelPrior = "dependent"),
# method = 'sampling')
})
# Predict
results[1,ii,4]=t[3]
t=system.time({
out =predict(test,object = mod.mfp)
})
results[1,ii,5]=t[3]
#compute and store the performance metrics
results[1,ii,1]= sqrt(mean((out - test$Age)^2))
results[1,ii,2]=mean(abs(out - test$Age))
results[1,ii,3] = cor(out,test$Age)
set.seed(ii)
t=system.time({
formula2 = paste(paste(colnames(data.example)[1], ' ~ 1 + bfp(',paste0(colnames(data.example)[-c(1)], collapse = ') + bfp(')))
formula2 = as.formula(paste(formula2, ')', sep = ''))
mod.mfp = BayesMfp(formula2, data = data.example, family = gaussian, priorSpecs = list(a = 4, modelPrior = "sparse"),
method = 'sampling')
})
# Predict
results[2,ii,4]=t[3]
t=system.time({
out =predict(test,object = mod.mfp)
})
results[2,ii,5]=t[3]
#compute and store the performance metrics
results[2,ii,1]= sqrt(mean((out - test$Age)^2))
results[2,ii,2]=mean(abs(out - test$Age))
results[2,ii,3] = cor(out,test$Age)
print( results[,ii,1])
gc()
#})), abort = function(){onerr=TRUE;out=NULL})})
}
ids=1:100
ress=results[,ids,]
#make the joint summary of the runs, including min, max and medians of the performance metrics
summary.results=array(data = NA,dim = c(15,15))
for(i in 1:2)
{
for(j in 1:5)
{
summary.results[i,(j-1)*3+1]=min(ress[i,,j])
summary.results[i,(j-1)*3+2]=median(ress[i,,j])
summary.results[i,(j-1)*3+3]=max(ress[i,,j])
}
}
summary.results=as.data.frame(summary.results)
summary.results = summary.results[1:2,]
round(summary.results[,c(2,1,3,5,4,6,8,7,9)],4)
names(summary.results)=c("min(rmse)","median(rmse)","max(rmse)","min(mae)","median(mae)","max(mae)","min(corr)","median(corr)","max(cor)","min(ltime)","median(ltime)","max(ltime)","min(ptime)","median(ptime)","max(ptime)")
rownames(summary.results)=c("lXGBOOST(logLik)","tXGBOOST(logLik)","LASSO","RIDGE","RFOREST","DEEPNETS","GR","VARBAYESS")
write.csv(x = summary.results,file = "summarycompete.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.